%load_ext autoreload
%autoreload 2
import sys
sys.path.insert(0, "/home/sturm/projects/2020/scirpy/")
from glob import glob
from multiprocessing import Pool
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scanpy as sc
import scanpy.external as sce
import scirpy as ir
from scipy import stats
import anndata
import warnings
import os
import re
warnings.filterwarnings("ignore", category=FutureWarning)
sc.settings.verbosity = 0
# suppress "storing XXX as categorical" warnings.
anndata.logging.anndata_logger.setLevel("ERROR")
input_file = "../results/05_prepare_adata_nk_t/adata.h5ad"
output_dir = "./tmp"
n_cpus = 32
# Parameters
input_file = "input_adata.h5ad"
n_cpus = "42"
output_dir = "."
tcr_dir = "cellranger"
n_cpus = int(n_cpus)
os.makedirs(output_dir, exist_ok=True)
def natural_sort(l):
convert = lambda text: int(text) if text.isdigit() else text.lower()
alphanum_key = lambda key: [convert(c) for c in re.split("([0-9]+)", key)]
return sorted(l, key=alphanum_key)
def define_public_clonotypes(
adata, clonotype_key, patient_key="patient", key_added="is_public"
):
"""Identify 'public' clonotypes based on the clonotype and patient annotation.
'public clonotypes' are clonotypes with identical/similar CDR3 sequences across patients."""
public_df = (
adata.obs.loc[:, [clonotype_key, patient_key]]
.groupby([clonotype_key, patient_key], observed=True)
.size()
.reset_index()
.groupby(clonotype_key)
.size()
.reset_index()
)
public_clonotypes = public_df.loc[public_df[0] > 1, clonotype_key]
result = pd.Categorical(
[
"public" if x else "private"
for x in adata.obs[clonotype_key].isin(public_clonotypes)
],
categories=["public", "private"],
)
adata.obs[key_added] = result
adata = sc.read_h5ad(input_file)
As a first step, we have a look at the "chain pairing" configuration, i.e how many CDR3 sequence have been detected per cell.
Here,
Multi-chains have already been removed in a previous filtering step.
_ = ir.pl.group_abundance(adata, groupby="chain_pairing", target_col="patient")
sc.pl.umap(adata, color="chain_pairing")
UMAP before removal:
sc.pl.umap(adata, color=["cluster", "cell_type"], legend_loc="on data")
adata.shape
(17062, 16729)
adata = adata[
(adata.obs["has_ir"] == "True")
& (~adata.obs["chain_pairing"].str.startswith("orphan"))
& (adata.obs["cell_type"] != "NK cell"),
:,
]
adata._sanitize()
adata.shape
(10734, 16729)
UMAP after removal:
sc.pl.umap(adata, color=["cluster", "cell_type"], legend_loc="on data")
For now, if a TCR has two alpha sequences, any of the two (plus the beta chain) needs to match an other cell to be considered a clonotype, or clonotype cluster, respectively
# based on sequence identity
ir.pp.ir_neighbors(adata, receptor_arms="all", dual_ir="any")
100%|██████████| 9302/9302 [00:00<00:00, 14321.85it/s] 100%|██████████| 8495/8495 [00:00<00:00, 19415.63it/s] 100%|██████████| 117784/117784 [00:01<00:00, 78721.69it/s]
ir.tl.define_clonotypes(adata)
# based on sequence similarity
ir.pp.ir_neighbors(
adata,
metric="alignment",
sequence="aa",
receptor_arms="all",
dual_ir="any",
cutoff=10,
n_jobs=n_cpus,
)
100%|██████████| 16653/16653 [00:31<00:00, 535.71it/s] 100%|██████████| 14365/14365 [00:27<00:00, 523.05it/s] 100%|██████████| 20984/20984 [00:01<00:00, 20184.73it/s] 100%|██████████| 10707/10707 [00:00<00:00, 19620.30it/s] 100%|██████████| 169220/169220 [00:02<00:00, 80959.19it/s]
ir.tl.define_clonotype_clusters(adata, sequence="aa", metric="alignment")
# ## store intermediate results ("cache")
# adata.write_h5ad("./tmp/tmp_adata.h5ad")
# adata = sc.read_h5ad("./tmp/tmp_adata.h5ad")
The following plot visualizes the clonotypes. Each "blob" represents a clonotype, each dot a cell.
Clonotypes of size 1 ("singletons" or "non-expanded" clonotypes) are excluded for a better overview.
ir.tl.clonotype_network(adata, min_size=2, layout="components")
_ = ir.pl.clonotype_network(
adata,
color=["patient", "cell_type"],
legend_loc="right margin",
edges=False,
size=80,
legend_fontoutline=3,
)
adata.obs["clonotype"] = adata.obs["clonotype"].str.replace("_TCR", "")
adata.obs["ct_cluster_aa_alignment"] = adata.obs["ct_cluster_aa_alignment"].str.replace(
"_TCR", ""
)
_ = ir.pl.clonotype_network(
adata,
color=["clonotype"],
size=80,
legend_fontoutline=3,
)
sc.pl.umap(adata, color=["cell_type"])
sc.pl.umap(adata, color=["CD8A", "CD4"], cmap="inferno")
ct_of_interest = ["842", "7284", "7283"]
sc.pl.umap(
adata,
color="clonotype",
groups=ct_of_interest,
size=[60 if x in ct_of_interest else 4 for x in adata.obs["clonotype"]],
)
There are no public clonotypes:
define_public_clonotypes(adata, "clonotype", "patient", key_added="is_public_clonotype")
print(
f"Number of public clonotypes based on nucleotide sequence identity: {np.sum(adata.obs['is_public_clonotype'] == 'public')}"
)
Number of public clonotypes based on nucleotide sequence identity: 0
The left panel shows the clonal expansion by the number of cells, the right panel by the number of clonotypes. I.e. if expansion is driven by a single, hyperexpanded clonotype, significant expansion will be visible in the left panel, but not in the right one.
def plot_clonal_expansion(adata, groupby="patient", title_extra=None):
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
ir.pl.clonal_expansion(
adata,
groupby,
normalize=False,
ax=ax1,
style=None,
)
ir.pl.clonal_expansion(
adata, groupby, normalize=False, summarize_by="clonotype", ax=ax2, style=None
)
_ = ax2.set_ylabel("number of clonotypes")
_ = ax1.set_ylabel("number of cells")
title = f"Clonal expansion by {groupby}"
if title_extra is not None:
title += " " + title_extra
fig.suptitle(title)
def plot_entropy_by_cell_type(adata, **kwargs):
fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(14, 4))
ir.pl.alpha_diversity(adata, "patient", style=None, ax=ax1, **kwargs)
ir.pl.alpha_diversity(
adata[adata.obs["cell_type"] == "T CD8+", :],
"patient",
style=None,
ax=ax2,
**kwargs,
)
ir.pl.alpha_diversity(
adata[adata.obs["cell_type"].isin(("T CD4+", "T reg.")), :],
"patient",
style=None,
ax=ax3,
**kwargs,
)
ax1.set_title("all cells")
ax2.set_title("CD8+ T cells")
ax3.set_title("CD4+ T cells")
for ax in [ax1, ax2, ax3]:
ax.get_legend().remove()
_ = fig.suptitle("Normalized shannon diversity")
def make_contingency_table(adata, col1, col2):
col1_cat = np.unique(adata.obs[col1].values)
col2_cat = np.unique(adata.obs[col2].values)
if len(col1_cat) != 2:
raise ValueError("Category1 != 2 items")
if len(col2_cat) != 2:
raise ValueError("Category1 != 2 items")
ct = pd.DataFrame(
np.array(
[
[
np.sum(
(adata.obs[col1] == col1_cat[0])
& (adata.obs[col2] == col2_cat[0])
),
np.sum(
(adata.obs[col1] == col1_cat[0])
& (adata.obs[col2] != col2_cat[0])
),
],
[
np.sum(
(adata.obs[col1] != col1_cat[0])
& (adata.obs[col2] == col2_cat[0])
),
np.sum(
(adata.obs[col1] != col1_cat[0])
& (adata.obs[col2] != col2_cat[0])
),
],
]
)
)
ct.columns = col2_cat
ct.index = col1_cat
return ct
def multi_group_abundance(adata, target_col, groupby=None, **kwargs):
"""Plot multiple group abundance plots side-by-side.
target col is the same for all plots.
groupby is a list with multiple variables.
"""
fig, axs = plt.subplots(1, len(groupby), figsize=(14, 4))
for group, ax in zip(groupby, axs):
ir.pl.group_abundance(adata, group, target_col, ax=ax, style=None, **kwargs)
for ax in axs[:-1]:
ax.get_legend().remove()
axs[-1].legend(bbox_to_anchor=(1.6, 1.05))
return fig, axs
def export_alpha_diversity(adata, target_col="clonotype", out_dir="tmp"):
adata_cd4 = adata[adata.obs["cell_type"].isin(("T CD4+", "T reg.")), :]
adata_cd8 = adata[adata.obs["cell_type"] == "T CD8+", :]
by_patient = pd.concat(
[
ir.tl.alpha_diversity(
adata, groupby="patient", target_col=target_col, inplace=False
),
ir.tl.alpha_diversity(
adata_cd8, groupby="patient", target_col=target_col, inplace=False
),
ir.tl.alpha_diversity(
adata_cd4, groupby="patient", target_col=target_col, inplace=False
),
],
axis=1,
)
by_patient.columns = ["all cells", "CD8+ T cells", "CD4+ T cells"]
by_cluster = pd.concat(
[
ir.tl.alpha_diversity(
adata, groupby="cluster", target_col=target_col, inplace=False
),
ir.tl.alpha_diversity(
adata_cd8, groupby="cluster", target_col=target_col, inplace=False
),
ir.tl.alpha_diversity(
adata_cd4, groupby="cluster", target_col=target_col, inplace=False
),
],
axis=1,
)
by_cluster.columns = ["all cells", "CD8+ T cells", "CD4+ T cells"]
by_patient.to_csv(
out_dir + "/" + f"normalized_shannon_entropy_{target_col}_by_patient.tsv",
sep="\t",
)
by_cluster.to_csv(
out_dir + "/" + f"normalized_shannon_entropy_{target_col}_by_cluster.tsv",
sep="\t",
)
plot_clonal_expansion(adata)
plot_clonal_expansion(
adata[adata.obs["cell_type"] == "T CD8+", :], title_extra="(CD8+ T cells)"
)
plot_clonal_expansion(
adata[adata.obs["cell_type"].isin(("T CD4+", "T reg.")), :],
title_extra="(CD4+ T cells)",
)
plot_entropy_by_cell_type(adata)
export_alpha_diversity(adata, "clonotype", out_dir=output_dir)
H143. It is responsible for a majority of expanded and convergent cells. ir.tl.clonotype_network(
adata, min_size=2, layout="components", metric="alignment", sequence="aa"
)
_ = ir.pl.clonotype_network(
adata,
color=["patient", "cell_type"],
legend_loc="right margin",
edges=False,
size=80,
legend_fontoutline=3,
)
Note that, based on nucleotide sequence identity, we did not find T cells with identical CDR3 sequences across patients. Therefore, the clonotypes identified as public here, consist of different, but highly similar CDR3 amino acid sequences.
define_public_clonotypes(adata, "ct_cluster_aa_alignment", key_added="is_public")
_ = ir.pl.clonotype_network(
adata,
color=["is_public", "hpv_status"],
legend_loc="right margin",
edges=False,
size=80,
legend_fontoutline=3,
)
contingency_table = make_contingency_table(adata, "is_public", "hpv_status")
contingency_table
| HPV- | HPV16+ | |
|---|---|---|
| private | 1597 | 9038 |
| public | 5 | 94 |
_, p = stats.fisher_exact(contingency_table)
print(f"Fisher's exact test of contingency table: p = {p:.4f}")
Fisher's exact test of contingency table: p = 0.0041
contingency_table = make_contingency_table(
adata[adata.obs["ir_status"] != "nan", :], "is_public", "ir_status"
)
contingency_table
| IR+ | IR- | |
|---|---|---|
| private | 6111 | 2927 |
| public | 76 | 18 |
_, p = stats.fisher_exact(contingency_table)
print(f"Fisher's exact test of contingency table: p = {p:.4f}")
Fisher's exact test of contingency table: p = 0.0054
We define a clonotype as convergent if multiple clonotypes with different nucleotide CDR3 sequences fall into the same "clonotype cluster", i.e. have similar amino acid sequences and likely recognize the same antigen.
This could be evidence of convergent evolution. See also scirpy glossary.
ir.tl.clonotype_convergence(
adata, key_coarse="ct_cluster_aa_alignment", key_fine="clonotype"
)
_ = ir.pl.clonotype_network(
adata,
color=["is_convergent", "clonotype"],
legend_loc=["right margin", "none"],
edges=False,
size=80,
legend_fontoutline=3,
ncols=2,
)
We observe convergence mostly among CD8+ T cells and among HPV- cells:
sc.pl.umap(
adata,
color="is_convergent",
groups=["convergent"],
size=[10 if x == "convergent" else 3 for x in adata.obs["is_convergent"]],
)
fig, ax = multi_group_abundance(
adata, "is_convergent", ["cell_type", "hpv_status", "ir_status"], normalize=True
)
_ = fig.suptitle("clonotype convergence")
However, this seems mostly driven by Patient H143.
sc.pl.umap(adata, color=["hpv_status", "patient"])
contingency_table = make_contingency_table(
adata[(adata.obs["cell_type"] == "T CD8+"), :],
"is_convergent",
"hpv_status",
)
contingency_table
| HPV- | HPV16+ | |
|---|---|---|
| convergent | 232 | 177 |
| not convergent | 349 | 2734 |
_, p = stats.fisher_exact(contingency_table)
print(f"Fisher's exact test of contingency table: p = {p:.4f}")
Fisher's exact test of contingency table: p = 0.0000
contingency_table = make_contingency_table(
adata[
(adata.obs["cell_type"] == "T CD8+") & (adata.obs["ir_status"] != "nan"),
:,
],
"is_convergent",
"ir_status",
)
contingency_table
| IR+ | IR- | |
|---|---|---|
| convergent | 120 | 57 |
| not convergent | 2118 | 616 |
_, p = stats.fisher_exact(contingency_table)
print(f"Fisher's exact test of contingency table: p = {p:.4f}")
Fisher's exact test of contingency table: p = 0.0042
We define "expanded" clonotypes as clonotypes with two (=2) or more than two (>= 3)
cells.
ir.tl.clonal_expansion(adata, target_col="ct_cluster_aa_alignment")
adata.obs["is_expanded"] = pd.Categorical(
["expanded" if x != "1" else "not expanded" for x in adata.obs["clonal_expansion"]]
)
Again, expanded clonotypes are mostly among CD8+ T cells. There are a few expanded regulatory T-cells as well.
sc.pl.umap(
adata,
color=["clonal_expansion", "cell_type", "CD8A"],
size=[10 if x != "1" else 3 for x in adata.obs["clonal_expansion"]],
)
_ = multi_group_abundance(
adata, "clonal_expansion", ["cell_type", "hpv_status", "ir_status"], normalize=True
)
ir.tl.group_abundance(adata, "cluster", "clonal_expansion", fraction=True)
| clonal_expansion | 1 | >= 3 | 2 |
|---|---|---|---|
| cluster | |||
| T CD4+ 2 | 0.923797 | 0.018717 | 0.057487 |
| T CD4+ 0 | 0.909871 | 0.035765 | 0.054363 |
| T CD4+ 1 | 0.978070 | 0.004386 | 0.017544 |
| T CD4+ 3 | 0.905185 | 0.026667 | 0.068148 |
| T CD4+ 4 | 0.995261 | 0.001580 | 0.003160 |
| T CD8+ 0 | 0.310345 | 0.544828 | 0.144828 |
| T CD4+ 5 | 0.923077 | 0.031136 | 0.045788 |
| T reg. 0 | 0.962825 | 0.003717 | 0.033457 |
| T CD8+ 1 | 0.349315 | 0.545662 | 0.105023 |
| T CD4+ 8 | 0.768868 | 0.075472 | 0.155660 |
| T CD4+ 7 | 0.969388 | 0.002551 | 0.028061 |
| T CD4+ 6 | 1.000000 | 0.000000 | 0.000000 |
| T reg. 1 | 0.844920 | 0.026738 | 0.128342 |
| T CD8+ 6 | 0.299728 | 0.588556 | 0.111717 |
| T CD8+ 3 | 0.269231 | 0.554945 | 0.175824 |
| T CD8+ 4 | 0.349570 | 0.524355 | 0.126074 |
| T CD4+ 9 | 0.946588 | 0.005935 | 0.047478 |
| T CD8+ 5 | 0.637255 | 0.264706 | 0.098039 |
| T CD8+ 7 | 0.395683 | 0.543165 | 0.061151 |
| T CD8+ 8 | 0.196154 | 0.723077 | 0.080769 |
| T CD8+ 2 | 0.150685 | 0.812785 | 0.036530 |
| T reg. 2 | 0.851163 | 0.060465 | 0.088372 |
| T CD8+ 9 | 0.401015 | 0.472081 | 0.126904 |
| T reg. 3 | 0.655172 | 0.262069 | 0.082759 |
| T reg. 4 | 0.804511 | 0.112782 | 0.082707 |
| T CD4+ 10 | 0.895161 | 0.064516 | 0.040323 |
| T CD8+ 10 | 0.205607 | 0.682243 | 0.112150 |
| T other 0 | 1.000000 | 0.000000 | 0.000000 |
| T reg. 5 | 0.910714 | 0.035714 | 0.053571 |
| T other 1 | 0.903226 | 0.000000 | 0.096774 |
| T CD8+ 11 | 0.481481 | 0.296296 | 0.222222 |
| T other 2 | 1.000000 | 0.000000 | 0.000000 |
for title, tmp_adata in (
("all cells", adata),
("CD4+ T cells", adata[adata.obs["cell_type"].isin(["T CD4+", "T reg."]), :]),
("CD8+ T cells", adata[adata.obs["cell_type"] == "T CD8+", :]),
):
fig, ax = plt.subplots(1, 1, figsize=(15, 5))
_ = ir.pl.group_abundance(
tmp_adata,
"cluster",
"clonal_expansion",
normalize=True,
ax=ax,
sort=natural_sort(np.unique(tmp_adata.obs["cluster"])),
)
ax.set_title(f"Clonal expansion per cluster: {title}")
adata_cd8 = adata[(adata.obs["cell_type"] == "T CD8+"), :]
contingency_table = make_contingency_table(
adata_cd8,
"is_expanded",
"hpv_status",
)
contingency_table
| HPV- | HPV16+ | |
|---|---|---|
| expanded | 408 | 1918 |
| not expanded | 173 | 993 |
_, p = stats.fisher_exact(contingency_table)
print(f"Fisher's exact test of contingency table: p = {p:.4f}")
Fisher's exact test of contingency table: p = 0.0432
A simple differential gene expresion analysis of expanded vs. other CD8 T cells yields the following genes:
sc.tl.rank_genes_groups(adata_cd8, "is_expanded", method="wilcoxon", use_raw=False)
sc.pl.rank_genes_groups(adata_cd8, ["expanded"])
contingency_table = make_contingency_table(
adata_cd8[(adata_cd8.obs["ir_status"] != "nan"), :],
"is_expanded",
"ir_status",
)
contingency_table
| IR+ | IR- | |
|---|---|---|
| expanded | 1492 | 426 |
| not expanded | 746 | 247 |
_, p = stats.fisher_exact(contingency_table)
print(f"Fisher's exact test of contingency table: p = {p:.4f}")
Fisher's exact test of contingency table: p = 0.1149
A simple differential gene expresion analysis of convergent vs. other CD8 T cells yields the following genes:
sc.tl.rank_genes_groups(adata_cd8, "is_convergent", method="wilcoxon", use_raw=False)
sc.pl.rank_genes_groups(adata_cd8, ["convergent"])
for tmp_adata, filename in [
(adata, "clonotype_clusters.tsv"),
(adata_cd8, "clonotype_clusters_cd8.tsv"),
]:
tmp_adata.obs.groupby(
[
"ct_cluster_aa_alignment",
"ct_cluster_aa_alignment_size",
"hpv_status",
"ir_status",
"is_convergent",
"is_expanded",
"is_public",
"clonal_expansion",
"patient",
],
observed=True,
).size().reset_index(name="cells_per_patient").to_csv(
os.path.join(output_dir, filename), sep="\t", index=False
)
plot_entropy_by_cell_type(adata, target_col="ct_cluster_aa_alignment")
export_alpha_diversity(adata, "ct_cluster_aa_alignment", out_dir=output_dir)
from matplotlib.gridspec import GridSpec
def network_plot_and_umap_expanded_clonotypes(
adata,
min_size,
umap_legend_loc="right margin",
network_panel_size=(3, 3),
figsize=(7, 7),
edges=False,
**kwargs
):
fig = plt.figure(figsize=figsize)
gs = GridSpec(2, 2, figure=fig)
ax1 = fig.add_subplot(gs[0, 0])
ax2 = fig.add_subplot(gs[0, 1])
ax3 = fig.add_subplot(gs[1, :])
ir.tl.clonotype_network(adata, min_size=min_size, sequence="aa", metric="alignment")
network_plot = ir.pl.clonotype_network(
adata,
color=["ct_cluster_aa_alignment", "patient"],
legend_loc=["on data", "right margin"],
edges=edges,
size=80,
legend_fontoutline=3,
ncols=3,
panel_size=network_panel_size,
ax=[ax1, ax2],
**kwargs
)
ct_ids = list(
set(
adata.obs.loc[
adata.obs["ct_cluster_aa_alignment_size"] >= min_size,
"ct_cluster_aa_alignment",
].tolist()
)
)
umap_plot = sc.pl.umap(
adata,
color="ct_cluster_aa_alignment",
groups=ct_ids,
size=[40 if c in ct_ids else 5 for c in adata.obs["ct_cluster_aa_alignment"]],
legend_loc=umap_legend_loc,
ax=ax3,
)
plt.show()
return ct_ids
ct_ids_20 = network_plot_and_umap_expanded_clonotypes(adata, 20)
ct_ids_10 = network_plot_and_umap_expanded_clonotypes(
adata, 10, network_panel_size=(5, 5), figsize=(10, 10)
)
ct_ids_5 = network_plot_and_umap_expanded_clonotypes(
adata, 5, network_panel_size=(5, 5), figsize=(10, 10), umap_legend_loc="none"
)
(for smaller cutoffs, the differential gene expression analysis does not work properly, as there will be to few cells per group)
sc.tl.rank_genes_groups(
adata, "ct_cluster_aa_alignment", groups=ct_ids_20, method="wilcoxon", use_raw=False
)
sc.pl.rank_genes_groups_matrixplot(
adata[adata.obs["ct_cluster_aa_alignment"].isin(ct_ids_20), :],
swap_axes=False,
n_genes=10,
standard_scale="var",
groups=ct_ids_20[:6],
)
sc.pl.rank_genes_groups_matrixplot(
adata[adata.obs["ct_cluster_aa_alignment"].isin(ct_ids_20), :],
swap_axes=False,
n_genes=10,
standard_scale="var",
groups=ct_ids_20[6:],
)
cols1 = [
"ct_cluster_aa_alignment",
"cell_type",
"cluster",
"is_convergent",
"is_expanded",
"is_public",
"clonal_expansion",
"ct_cluster_aa_alignment_size",
]
cols2 = cols1 + [
"patient",
"ir_status",
"hpv_status",
]
expr_cluster_ct_cluster_table = (
adata.obs.loc[
:,
cols1,
]
.groupby(cols1, observed=True)
.size()
.reset_index(name="n_cells")
)
expr_cluster_ct_cluster_table_by_patient = (
adata.obs.loc[
:,
cols2,
]
.groupby(cols2, observed=True)
.size()
.reset_index(name="n_cells")
)
expr_cluster_ct_cluster_table.to_csv(
os.path.join(output_dir, "expr_cluster_ct_cluster_table.tsv"), sep="\t", index=False
)
expr_cluster_ct_cluster_table_by_patient.to_csv(
os.path.join(output_dir, "expr_cluster_ct_cluster_table_by_patient.tsv"),
sep="\t",
index=False,
)
ir.pl.group_abundance(
adata[adata.obs["ct_cluster_aa_alignment"].isin(ct_ids_10), :],
"ct_cluster_aa_alignment",
"cluster",
normalize=True,
figsize=(14, 4),
)
<AxesSubplot:title={'center':'Fraction of cluster in each ct_cluster_aa_alignment'}, xlabel='ct_cluster_aa_alignment', ylabel='Fraction of cells in cluster'>
adata_cd4 = adata[adata.obs["cell_type"].isin(["T CD4+", "T reg."]), :]
sc.pl.umap(adata_cd4, color="cell_type")
ct_ids_cd4_10 = network_plot_and_umap_expanded_clonotypes(adata_cd4, 10, edges=True)
ct_ids_cd4_3 = network_plot_and_umap_expanded_clonotypes(
adata_cd4,
3,
network_panel_size=(7, 7),
figsize=(14, 14),
umap_legend_loc="none",
edges=True,
)
Comparison of clonotype 4243 vs other Tregs
adata.obs["ct_5243_comparison"] = "nan"
adata.obs.loc[
(adata.obs["ct_cluster_aa_alignment"] == "5243")
& (adata.obs["cell_type"] == "T reg."),
"ct_5243_comparison",
] = "ct_5243"
adata.obs.loc[
(adata.obs["ct_cluster_aa_alignment"] != "5243")
& (adata.obs["cell_type"] == "T reg."),
"ct_5243_comparison",
] = "other Tregs"
sc.tl.rank_genes_groups(
adata,
groupby="ct_5243_comparison",
groups=["ct_5243"],
reference="other Tregs",
use_raw=False,
)
sc.pl.rank_genes_groups(adata, groups=["ct_5243"])
adata.obs["cell_type_expansion"] = [
f"{cell_type} {expanded}"
for cell_type, expanded in zip(adata.obs["cell_type"], adata.obs["is_expanded"])
]
sc.pl.stacked_violin(
adata[adata.obs["cell_type"] != "T other", :],
var_names=["LTB", "BLK", "CXCL12", "CCL4"],
groupby="cell_type_expansion",
swap_axes=True,
cmap="YlOrRd",
save="expansion"
)
/opt/conda/lib/python3.8/site-packages/anndata/_core/anndata.py:1209: ImplicitModificationWarning: Initializing view as actual. warnings.warn(
adata.write_h5ad(f"{output_dir}/adata_tcr.h5ad")